#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
from lxml import html
from selenium import webdriver


def spider(url):
    # 最大化浏览器窗口
    # driver.maximize_window()
    # print ("正在访问{}".format(url))
    driver.get(url)
    # 获得html内容
    html_text = driver.page_source

    # 获得xpath对象
    selector = html.fromstring(html_text)

    info_list = selector.xpath('//td[@id="playleft"]/iframe')

    src = info_list[1].xpath('@src')[0]
    m3u8_url = str(src.split("?")[1].split('=')[1])
    index = m3u8_url.index('/', 8)
    base_url = m3u8_url[0:index]
    r = requests.request('get', m3u8_url, timeout=60)  # 请求超时时间为30秒
    r.encoding = r.apparent_encoding  # 配置编码
    m3u8_context = r.text
    split = m3u8_context.split("\n")
    for s in split:
        if not s.startswith("#"):
            return base_url + s
    # return base_url + split[len(split) - 1]


if __name__ == '__main__':
    """ 爬取草民网站视频的m3u8地址 """
    url = "https://www.cmdy2020.com/guochanju/tianlongbabuhujunban.html"
    driver = webdriver.Chrome()
    driver.get(url)
    # 获得html内容
    html_text = driver.page_source
    # 获得xpath对象
    selector = html.fromstring(html_text)

    video_name = selector.xpath('//dt[@class="name"]/text()')[0]
    print("正在获取{name}的m3u8视频下载地址".format(name=video_name))
    href_list = selector.xpath('//div[@id="stab1"]/div[1]/div[@class="videourl clearfix"]/ul/li/a/@href')
    url_list = []
    for li in href_list:
        url_list.append(str(li))
    file = open('G:/{name}m3u8下载地址.txt'.format(name=video_name), 'w')
    index = 1
    for url in url_list:
        file.write(spider(url) + "\n")
        file.write('{name}-{index}'.format(name=video_name, index=index) + "\n")
        print("获取{index}集成功, 地址: {url}".format(index=index, url=url))
        index = index + 1

    file.close()
    driver.close()
